NNSVS demos
Singing voice synthesis (SVS) demo using nnsvs. All the models were trained using https://github.com/r9y9/nnsvs/. Recipes to reproduce experiments are included in the repository.
Preparation
[1]:
%%capture
try:
import nnsvs
except ImportError:
! pip install git+https://github.com/r9y9/nnsvs
[2]:
%pylab inline
%load_ext autoreload
%autoreload
import IPython
from IPython.display import Audio
import numpy as np
import pysinsy
import librosa
from nnmnkwii.io import hts
%pylab is deprecated, use %matplotlib inline and import the required libraries.
Populating the interactive namespace from numpy and matplotlib
[3]:
from nnsvs.pretrained import create_svs_engine
import nnsvs
kiritan_singing
https://zunko.jp/kiridev/login.php
[4]:
engine = create_svs_engine("r9y9/kiritan_latest")
contexts = pysinsy.extract_fullcontext(nnsvs.util.example_xml_file("get_over"))
labels = hts.HTSLabelFile.create_from_contexts(contexts)
wav, sr = engine.svs(labels)
wav = librosa.effects.trim(wav.astype(np.float64), top_db=40)[0]
Audio(wav, rate=sr)
Downloading: "https://www.dropbox.com/s/3cteq2rdnkn2ape/kiritan_latest.tar.gz?dl=1"
[4]:
nit-song070
http://hts.sp.nitech.ac.jp/archives/2.3/HTS-demo_NIT-SONG070-F001.tar.bz2
[5]:
engine = create_svs_engine("r9y9/yoko_latest")
contexts = pysinsy.extract_fullcontext(nnsvs.util.example_xml_file("get_over"))
labels = hts.HTSLabelFile.create_from_contexts(contexts)
wav, sr = engine.svs(labels)
wav = librosa.effects.trim(wav.astype(np.float64), top_db=40)[0]
Audio(wav, rate=sr)
Downloading: "https://www.dropbox.com/s/k8mya65yt52m0ps/yoko_latest.tar.gz?dl=1"
[5]:
oniku_kurumi
[6]:
engine = create_svs_engine("r9y9/oniku_kurumi_latest")
contexts = pysinsy.extract_fullcontext(nnsvs.util.example_xml_file("get_over"))
labels = hts.HTSLabelFile.create_from_contexts(contexts)
wav, sr = engine.svs(labels)
wav = librosa.effects.trim(wav.astype(np.float64), top_db=40)[0]
Audio(wav, rate=sr)
Downloading: "https://www.dropbox.com/s/qkctk86tec8gpbf/oniku_kurumi_latest.tar.gz?dl=1"
[6]:
jsut-song
[7]:
engine = create_svs_engine("r9y9/jsut_latest")
contexts = pysinsy.extract_fullcontext(nnsvs.util.example_xml_file("get_over"))
labels = hts.HTSLabelFile.create_from_contexts(contexts)
wav, sr = engine.svs(labels)
wav = librosa.effects.trim(wav.astype(np.float64), top_db=40)[0]
Audio(wav, rate=sr)
Downloading: "https://www.dropbox.com/s/xj5vdnfmw8k2yxq/jsut_latest.tar.gz?dl=1"
[7]: